Load the libraries

library(tidyverse)
library(janitor)
library(naniar)
library(ggmap)
library(shiny)
library("tidyverse")
library("naniar")
library("janitor")
library("stringr")
library(tidyverse)
library(janitor)
library(naniar)
library(tidyverse)
library(skimr)
library(janitor)
library(palmerpenguins)
library(gtools)
library(RColorBrewer)
library(paletteer)
library(ggthemes)
options(scipen = 999)

locations <- read.csv("Health_Facility_General_Information.csv") %>% 
  clean_names()

cardiac <- read_csv("cardiac-surgery.csv") %>% 
  clean_names() %>% 
    filter(facility_id!=0)
## Rows: 1609 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): Hospital Name, Detailed Region, Region, Procedure, Year of Hospital...
## dbl (8): Facility ID, Number of Cases, Number of Deaths, Observed Mortality ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#COMPARING NUMBER OF TOTAL PROCEDURES WITH AVERAGE MORTALITY RATE Per procedure
total_cases <- cardiac %>% 
  group_by(facility_id, procedure) %>% 
  summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
  ggplot(aes(x = total_procedures, y = avg_mortality))+
  geom_point(color = "olivedrab") +
  labs(title = "Average Mortality Rate compared with Procedure Numbers ",
       x = "Total Number of Procedures",
       y = "Mortality Rate")+
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+theme_classic()+facet_wrap(~procedure)+scale_x_log10()+geom_smooth(method=lm, se=F)
## `summarise()` has grouped output by 'facility_id'. You can override using the
## `.groups` argument.
total_cases
## `geom_smooth()` using formula = 'y ~ x'

#COMPARING NUMBER OF TOTAL PROCEDURES WITH AVERAGE MORTALITY RATE

cardiac %>% 
  group_by(facility_id, procedure) %>% 
  summarize(total_procedures = sum(number_of_cases), avg_mortality = mean(observed_mortality_rate)) %>%
  ggplot(aes(x = total_procedures, y = avg_mortality))+
  geom_point(color = "olivedrab") +
  labs(title = "Average Mortality Rate compared with Procedure Numbers ",
       x = "Total Number of Procedures",
       y = "Mortality Rate")+
  theme(axis.text.y = element_text(angle = 0, hjust = 1, size = 5))+theme_classic()+scale_x_log10()+geom_smooth(method=lm, se=F)
## `summarise()` has grouped output by 'facility_id'. You can override using the
## `.groups` argument.
## `geom_smooth()` using formula = 'y ~ x'

locations <- locations %>% 
    group_by(facility_id) %>% 
    summarise(across(everything(), first)) 
cardiac <- left_join(cardiac, locations, by="facility_id")

Map:

#Overall deaths for all years:

lat <- c(40.58, 44.70) #we got tehse values from summary above
long <- c(-78.87, -72.98)
bbox <- make_bbox(long, lat, f = 0.03)

maptotal <- get_stadiamap(bbox, maptype = "stamen_terrain", zoom=7) #Make ur basemap, there are other styles which you could find from this link, https://docs.stadiamaps.com/themes/
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
x <- cardiac %>% 
  arrange(hospital_name) %>% 
  group_by(facility_id, hospital_name, facility_latitude, facility_longitude) %>% 
  summarise(number_of_cases=sum(number_of_cases), average_moratality_rate=mean(observed_mortality_rate), .groups = 'keep') %>% 
  mutate(total_deaths=average_moratality_rate/100*number_of_cases)

ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude)) + labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

register_stadiamaps("e2de824a-0995-4a51-915c-33c14c061e7b", write = FALSE) 
cardiac %>%  #GET MAX LAT AND LON FROM CODE BELOW!!! EASY MAKES SENSE
  select(facility_longitude, facility_latitude) %>% 
  summary()
##  facility_longitude facility_latitude
##  Min.   :-78.87     Min.   :40.58    
##  1st Qu.:-75.27     1st Qu.:40.74    
##  Median :-73.95     Median :40.88    
##  Mean   :-74.65     Mean   :41.61    
##  3rd Qu.:-73.80     3rd Qu.:42.73    
##  Max.   :-72.98     Max.   :44.70    
##  NA's   :65         NA's   :65
#For number of cases
ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = number_of_cases)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#For moratlliry rate

ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = average_moratality_rate)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#For total deaths
ggmap(maptotal) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = total_deaths)) + #WE just add the chart in without using ggplot! 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") 
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Zoom NYC:
lat1 <- c(40.58, 41.2) #we got tehse values from summary above
long1 <- c(-74.5, -72.98)
bbox1 <- make_bbox(long1, lat1, f = 0.03)

mapnyc <- get_stadiamap(bbox1, maptype = "stamen_terrain", zoom=8) 
## ℹ © Stadia Maps © Stamen Design © OpenMapTiles © OpenStreetMap contributors.
#NYC Rate of mortaility
ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = average_moratality_rate)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

#NYC Total Cases
ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = number_of_cases)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

#NYC total deaths
ggmap(mapnyc) + 
  geom_point(data = x, aes(facility_longitude, facility_latitude, colour  = total_deaths)) + 
  labs(x= "Longitude", y= "Latitude", title="Cardiac Locations") #colour??
## Warning: Removed 39 rows containing missing values or values outside the scale range
## (`geom_point()`).

Load the Data

cardiac %>% 
  group_by(facility_id) %>% 
  summarise(mean_observed_mortality=mean(observed_mortality_rate))
## # A tibble: 67 × 2
##    facility_id mean_observed_mortality
##          <dbl>                   <dbl>
##  1           1                   2.47 
##  2           5                   2.08 
##  3          58                   2.42 
##  4          66                   0.659
##  5         116                   2.35 
##  6         135                   2.34 
##  7         181                   1.49 
##  8         207                   2.10 
##  9         210                   1.42 
## 10         213                   1.98 
## # ℹ 57 more rows
cardiac %>% 
  group_by(facility_id) %>% 
  summarise(mean_observed_mortality=mean(observed_mortality_rate)) %>% 
  summarise(std=sd(mean_observed_mortality), mean_observed_mortality=mean(mean_observed_mortality), minmumum=min(mean_observed_mortality), maximum=max(mean_observed_mortality)) #Idk why this did not work.
## # A tibble: 1 × 4
##     std mean_observed_mortality minmumum maximum
##   <dbl>                   <dbl>    <dbl>   <dbl>
## 1  1.20                    2.05     2.05    2.05
x %>% 
  ggplot(aes(x=average_moratality_rate, y=total_deaths))+geom_point()

x %>% 
  ggplot(aes(x=number_of_cases, y=total_deaths))+geom_point()+geom_smooth(method=lm, se=F)
## `geom_smooth()` using formula = 'y ~ x'

Cleaning the Data

cardiac <- cardiac %>% 
  mutate(across(everything(), tolower))
cardiac <- cardiac %>%
  mutate(across(everything(), tolower)) %>% 
  mutate(across(everything(), ~ str_replace_all((.), " ", "_"))) %>% 
  mutate(across(everything(), ~ str_replace_all((.), "__", "_"))) %>% 
  mutate(across(-"year_of_hospital_discharge", ~ str_remove((.), "-"))) %>% 
  mutate(across(number_of_cases:risk_adjusted_mortality_rate, as.numeric))

Have to separate the data, as some of them are over a range of a couple of years, some of them are year by year

sites_with_ranges <- cardiac %>%
  filter(str_detect(year_of_hospital_discharge, "-", negate = FALSE))

sites_per_year <- cardiac %>% 
  filter(str_detect(year_of_hospital_discharge, "-", negate = TRUE))

Plots

What is the procedure with the highest observed mortality rate?

cardiac %>% 
  group_by(procedure) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  arrange(-avg_mortality)
## # A tibble: 6 × 2
##   procedure           avg_mortality
##   <chr>                       <dbl>
## 1 tavr                        4.66 
## 2 valve_or_valve/cabg         3.80 
## 3 emergency_pci               3.12 
## 4 cabg                        1.52 
## 5 all_pci                     1.22 
## 6 nonemergency_pci            0.720
sites_per_year %>% 
  group_by(procedure) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  ggplot(aes(x = reorder(procedure, avg_mortality), y = avg_mortality))+
  geom_col(color = "black", fill = "lightblue")+
  coord_flip()+
  labs(title = "Average Observed Mortality Rate by Procedure",
       x = "Procedure",
       y = "Mortality Rate")+
  theme_light(base_size = 12)

sites_with_ranges %>% 
  group_by(procedure) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  ggplot(aes(x = reorder(procedure, avg_mortality), y = avg_mortality))+
  geom_col(color = "black", fill = "lightblue")+
  coord_flip()+
  labs(title = "Average Observed Mortality Rate by Procedure",
       x = "Procedure",
       y = "Mortality Rate")+
  theme_light(base_size = 12)

How has the mortality rate changed for cardiac surgery over time in each region?

#I'll come back to this later if I figure out the year stuff but feel free to take a crack at it :). This is mean observed Rate 
sites_per_year %>%
  group_by(year_of_hospital_discharge) %>% 
  summarize(observed_mortality_rate=mean(observed_mortality_rate)) %>% 
  ggplot(aes(x =year_of_hospital_discharge, y = observed_mortality_rate))+
  geom_col(color = "steelblue", na.rm = T) + geom_smooth(method = lm, se= TRUE) + 
  labs(title = "Regional Cardiac Surgery Morality Rates",
       x = "Year of Hospital Discharge",
       y = "Mean Observed Mortality Rate")
## `geom_smooth()` using formula = 'y ~ x'

sites_with_ranges %>%
    group_by(year_of_hospital_discharge) %>% 
  summarize(observed_mortality_rate=mean(observed_mortality_rate)) %>% 
  ggplot(aes(x = observed_mortality_rate, y = year_of_hospital_discharge))+
  geom_col(color = "steelblue") + geom_smooth(method = lm, se= TRUE) + 
  labs(title = "Regional Cardiac Surgery Morality Rates",
       x = "Mean Observed Mortality Rate",
       y = "Year of Hospital Discharge")
## `geom_smooth()` using formula = 'y ~ x'

How does the expected mortality rate compare to the observed mortality rate?

#I don't know which graph to plot for this one but if you can figure it out that would be great. I was thinking maybe scatter but I don't know for sure. I THINK SCATTER IS A GREAT CHOICE BUT IDk why we need two because they are still the same concept... my opinion is cardiac below:

cardiac %>%
  ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
  geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) + 
  labs(title = "Expected vs Observed Mortality Rate",
       x = "Expected Mortality",
       y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'

sites_per_year %>%
  ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
  geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) + 
  labs(title = "Expected vs Observed Mortality Rate",
       x = "Expected Mortality",
       y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'

sites_with_ranges %>%
  ggplot(aes(x = expected_mortality_rate, y = observed_mortality_rate))+
  geom_point(color = "steelblue") + geom_smooth(method = lm, se= TRUE) + 
  labs(title = "Expected vs Observed Mortality Rate",
       x = "Expected Mortality",
       y = "Observed Mortality")
## `geom_smooth()` using formula = 'y ~ x'

Which region has the highest mortality rate?

cardiac %>% 
  group_by(region) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  arrange(-avg_mortality)
## # A tibble: 7 × 2
##   region                 avg_mortality
##   <chr>                          <dbl>
## 1 western_ny__rochester           2.39
## 2 central_ny                      2.30
## 3 ny_metro__nyc                   2.19
## 4 capital_district                1.86
## 5 ny_metro__new_rochelle          1.80
## 6 ny_metro__long_island           1.74
## 7 western_ny__buffalo             1.67
cardiac %>% 
  group_by(region) %>% 
  summarize(avg_mortality = mean(observed_mortality_rate)) %>% 
  ggplot(aes(x = reorder(region, avg_mortality), y = avg_mortality))+
  geom_col(color = "black", fill = "lightpink")+
  coord_flip()+
  labs(title = "Average Observed Mortality Rate Per Region",
       x = "Region",
       y = "Mortality Rate")+
  theme_light(base_size = 12)

Shiny App

Don’t mind the code and names for this, I’m testing out some stuff :D

library(shinydashboard)
## 
## Attaching package: 'shinydashboard'
## The following object is masked from 'package:graphics':
## 
##     box
ui <- dashboardPage(skin = "purple",
                    
  dashboardHeader(title = "Cardiac Dashboard"),
  
  ## Sidebar content
  dashboardSidebar(
    sidebarMenu(
      
      menuItem("Observed Mortality Rate", 
               tabName = "dashboard", 
               icon = icon("dashboard")),
      
      menuItem("Counts", 
               tabName = "widgets", 
               icon = icon("th")),
      menuItem("Cases",
               tabName = "cases",
               icon = icon("folder-open")),
      menuItem("Maps",
               tabName = "map",
               icon = icon("earth-americas"))
    )
    ),
  
  ## Body content
  dashboardBody(
    
    tabItems(
  
      ## First tab content    
      tabItem(tabName = "dashboard",
        fluidRow(
          box(plotOutput("plot1a", height = 250)), # box is a container for the plot
          box(title = "Controls", # box is a container for the controls
              selectInput("region", 
                          "Select Region of Interest:", 
                          choices=unique(cardiac$region))
          )
          ),
        fluidRow(
          box(plotOutput("plot1b", height = 250)),
          box(title = "Controls",
              selectInput("year",
                          "Select Year of Interest:",
                          choices = unique(cardiac$year_of_hospital_discharge)))
          )
          ),

      ## Second tab item 
      tabItem(tabName = "widgets",
        fluidRow(
          box(plotOutput("plot2", height = 250)), # box is a container for the plot
          box(title = "Controls", # box is a container for the controls
              radioButtons("x", 
               "Select Fill Variable", 
               choices=c("region", "procedure"),
               selected="region")
          )
          )
          ),
      
      ## Third tab item
      tabItem(tabName = "cases",
        fluidRow(
          box(plotOutput("plot3a", height = 250)), # box is a container for the plot
          box(title = "Controls", # box is a container for the controls
              selectInput("a", 
               "Select Hospital", 
               choices=c(unique(cardiac$hospital_name))
          )
          )
          ),
        fluidRow(
          box(plotOutput("plot3b", height = 250)), # box is a container for the plot
          box(title = "Controls", # box is a container for the controls
              selectInput("b", 
               "Select Hospital", 
               choices=c(unique(cardiac$hospital_name))
          )
          )
          )
          ),
        
        tabItem(tabName = "map",
          fluidRow(
            box(plotOutput("plot4a", height = 250)),
            box(title = "Controls",
                selectInput("m",
                            "Select Region",
                             choices = c(unique(cardiac$region))
          
          )
          )
          ),
          fluidRow(
            box(plotOutput("plot4b", height = 450))
          )
          )
          )
          )
  )
          
server <- function(input, output, session) {
  
  session$onSessionEnded(stopApp)

  output$plot1a <- renderPlot({

    cardiac %>%
      filter(region == input$region) %>%
      ggplot(aes(x = observed_mortality_rate))+ 
      geom_density(color = "black", fill = "steelblue", alpha = 0.6)
  })
  
  output$plot1b <- renderPlot({

    cardiac %>%
      filter(year_of_hospital_discharge == input$year) %>%
      ggplot(aes(x = observed_mortality_rate))+ 
      geom_density(color = "black", fill = "steelblue", alpha = 0.6)
  })
  
    output$plot2 <- renderPlot({
    
    cardiac %>% 
      ggplot(aes_string(x="region", fill=input$x))+
      geom_bar(position="dodge", alpha=0.8, color="black")+
      labs(x=NULL, y=NULL, fill="Fill Variable")
    
  })
    
    output$plot3a <- renderPlot({
    
    sites_per_year %>% 
      filter(hospital_name == input$a) %>% 
      group_by(year_of_hospital_discharge) %>% 
      summarize(total_cases_a = sum(number_of_cases)) %>% 
      ggplot(aes_string(x="year_of_hospital_discharge", y = "total_cases_a"))+
      coord_flip()+
      geom_col(alpha=0.8, color="black", fill = "steelblue")+
      labs(title = "Number of Cases per Year",
           x= "Year", 
           y= "Number of Cases")+
        theme_light(base_size = 14)
    
  })
    
    
  output$plot3b <- renderPlot({
    
    sites_with_ranges %>% 
      filter(hospital_name == input$b) %>% 
      group_by(year_of_hospital_discharge) %>% 
      summarize(total_cases_b = sum(number_of_cases)) %>% 
      ggplot(aes_string(x="year_of_hospital_discharge", y = "total_cases_b"))+
      coord_flip()+
      geom_col(alpha=0.8, color="black", fill = "lightblue")+
      labs(title = "Number of Cases per Year Range",
           x= "Year Range", 
           y= "Number of Cases")+
      theme_light(base_size = 14)
    
  })
  
  output$plot4a <- renderPlot({
    
      ggmap(map2)+
      geom_point(data = cardiac,
             aes(facility_longitude, facility_latitude),
             na.rm = T) #trying to use the select input
    
  })
  
  output$plot4b <- renderPlot({
    
    ggmap(map1) + 
    geom_point(data = cardiac, 
               aes(facility_longitude, facility_latitude, size = number_of_cases, color = number_of_cases),
               na.rm = T)+
    labs(x= "Longitude", y= "Latitude", title="Cardiac Locations")
    
  })
}

shinyApp(ui, server)
Shiny applications not supported in static R Markdown documents